{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# EqualWidthDiscretiser\n",
    "\n",
    "The EqualWidthDiscretiser() divides continuous numerical variables into\n",
    "intervals of the same width, that is, equidistant intervals. Note that the\n",
    "proportion of observations per interval may vary.\n",
    "\n",
    "The number of intervals\n",
    "in which the variable should be divided must be indicated by the user.\n",
    "\n",
    "**Note**\n",
    "\n",
    "For this demonstration, we use the Ames House Prices dataset produced by Professor Dean De Cock:\n",
    "\n",
    "Dean De Cock (2011) Ames, Iowa: Alternative to the Boston Housing\n",
    "Data as an End of Semester Regression Project, Journal of Statistics Education, Vol.19, No. 3\n",
    "\n",
    "http://jse.amstat.org/v19n3/decock.pdf\n",
    "\n",
    "https://www.tandfonline.com/doi/abs/10.1080/10691898.2011.11889627\n",
    "\n",
    "The version of the dataset used in this notebook can be obtained from [Kaggle](https://www.kaggle.com/c/house-prices-advanced-regression-techniques/data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "from feature_engine.discretisation import EqualWidthDiscretiser\n",
    "\n",
    "plt.rcParams[\"figure.figsize\"] = [15,5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Id</th>\n",
       "      <th>MSSubClass</th>\n",
       "      <th>MSZoning</th>\n",
       "      <th>LotFrontage</th>\n",
       "      <th>LotArea</th>\n",
       "      <th>Street</th>\n",
       "      <th>Alley</th>\n",
       "      <th>LotShape</th>\n",
       "      <th>LandContour</th>\n",
       "      <th>Utilities</th>\n",
       "      <th>...</th>\n",
       "      <th>PoolArea</th>\n",
       "      <th>PoolQC</th>\n",
       "      <th>Fence</th>\n",
       "      <th>MiscFeature</th>\n",
       "      <th>MiscVal</th>\n",
       "      <th>MoSold</th>\n",
       "      <th>YrSold</th>\n",
       "      <th>SaleType</th>\n",
       "      <th>SaleCondition</th>\n",
       "      <th>SalePrice</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>60</td>\n",
       "      <td>RL</td>\n",
       "      <td>65.0</td>\n",
       "      <td>8450</td>\n",
       "      <td>Pave</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Reg</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2008</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "      <td>208500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>20</td>\n",
       "      <td>RL</td>\n",
       "      <td>80.0</td>\n",
       "      <td>9600</td>\n",
       "      <td>Pave</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Reg</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>2007</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "      <td>181500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>60</td>\n",
       "      <td>RL</td>\n",
       "      <td>68.0</td>\n",
       "      <td>11250</td>\n",
       "      <td>Pave</td>\n",
       "      <td>NaN</td>\n",
       "      <td>IR1</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>9</td>\n",
       "      <td>2008</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "      <td>223500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>70</td>\n",
       "      <td>RL</td>\n",
       "      <td>60.0</td>\n",
       "      <td>9550</td>\n",
       "      <td>Pave</td>\n",
       "      <td>NaN</td>\n",
       "      <td>IR1</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2006</td>\n",
       "      <td>WD</td>\n",
       "      <td>Abnorml</td>\n",
       "      <td>140000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>60</td>\n",
       "      <td>RL</td>\n",
       "      <td>84.0</td>\n",
       "      <td>14260</td>\n",
       "      <td>Pave</td>\n",
       "      <td>NaN</td>\n",
       "      <td>IR1</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>12</td>\n",
       "      <td>2008</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "      <td>250000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 81 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   Id  MSSubClass MSZoning  LotFrontage  LotArea Street Alley LotShape  \\\n",
       "0   1          60       RL         65.0     8450   Pave   NaN      Reg   \n",
       "1   2          20       RL         80.0     9600   Pave   NaN      Reg   \n",
       "2   3          60       RL         68.0    11250   Pave   NaN      IR1   \n",
       "3   4          70       RL         60.0     9550   Pave   NaN      IR1   \n",
       "4   5          60       RL         84.0    14260   Pave   NaN      IR1   \n",
       "\n",
       "  LandContour Utilities  ... PoolArea PoolQC Fence MiscFeature MiscVal MoSold  \\\n",
       "0         Lvl    AllPub  ...        0    NaN   NaN         NaN       0      2   \n",
       "1         Lvl    AllPub  ...        0    NaN   NaN         NaN       0      5   \n",
       "2         Lvl    AllPub  ...        0    NaN   NaN         NaN       0      9   \n",
       "3         Lvl    AllPub  ...        0    NaN   NaN         NaN       0      2   \n",
       "4         Lvl    AllPub  ...        0    NaN   NaN         NaN       0     12   \n",
       "\n",
       "  YrSold  SaleType  SaleCondition  SalePrice  \n",
       "0   2008        WD         Normal     208500  \n",
       "1   2007        WD         Normal     181500  \n",
       "2   2008        WD         Normal     223500  \n",
       "3   2006        WD        Abnorml     140000  \n",
       "4   2008        WD         Normal     250000  \n",
       "\n",
       "[5 rows x 81 columns]"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = pd.read_csv('housing.csv')\n",
    "data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "X_train : (1022, 79)\n",
      "X_test : (438, 79)\n"
     ]
    }
   ],
   "source": [
    "# let's separate into training and testing set\n",
    "X = data.drop([\"Id\", \"SalePrice\"], axis=1)\n",
    "y = data.SalePrice\n",
    "\n",
    "X_train, X_test, y_train, y_test = train_test_split(\n",
    "    X, y, test_size=0.3, random_state=0)\n",
    "\n",
    "print(\"X_train :\", X_train.shape)\n",
    "print(\"X_test :\", X_test.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAA2cAAAE/CAYAAADCCbvWAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAepElEQVR4nO3df7Cld10f8PfHBBIkKIkh65KkbGhTK5gS6E6M0qG3BE0gjomOOLEBNpbOtiMi1nRgI46/pumktjrQUbQpoEv5EcMPJzsEq2nwjmPFRH6EH0mICbCQJUsWUIRlbOzCp3+cZ/WyuXfvPXfvj+fe+3rNnDnnfM/znPM9nz33Pve93+/zPdXdAQAAYH1903p3AAAAAOEMAABgFIQzAACAERDOAAAARkA4AwAAGAHhDAAAYASEM1hEVR2uqqeudz8AANjchDM2paq6qqruqKqvVtWh4fZPVFUtsP3vVNV/nO+x7j6tuz85xWtfU1VdVT+63P4DwEZSVfur6nlTbD9TVQcWeMxxlC1LOGPTqaprk7w2yX9J8u1JtiX5d0meneSx82x/0gp3YVeSvxyuj9fPk1f4dQFgM3AcZcsSzthUqupbk/xykp/o7nd091d64kPdfXV3PzKMkv1mVb2nqr6a5F8u8pxdVf+oqi6uqs/NDXNV9UNV9ZE595+S5F8k2Z3k0qraNuexmao6UFWvqqrPJfntqvqmqtpTVZ+oqi9W1c1Vdcacfd4+vOZfV9UfV9XTV6xYALCKquqUqnpNVT00XF4ztD0+ye8nefJw6sDhqnrysI/jKFuacMZm8z1JTklyyyLb/ask1yd5QpI/WcoTd/efJflqkuce8zxvnXP/JUne393vTHJvkquPeZpvT3JGkqdkcuD5qSRXZnIgenKSv0ryG3O2//0k5yc5K8kHk7xlKX0FgBF4dZKLk1yY5BlJLkryc9391STPT/LQcOrAad390LCP4yhbmnDGZnNmki9095GjDVX1p1X1par6m6p6ztB8S3f/n+7+enf/3yme/21Jfmx43ickecHQdtRL8vdh7a159JSMryf5he5+pLv/Jsm/TfLq7j7Q3Y8k+cUkP3J0qkZ3v3EY/Tv62DOG0UEAGLurk/xydx/q7s8n+aUkL15kH8dRtjThjM3mi0nOnDsPvbu/t7ufODx29DP/4DKf/61JfriqTknyw0k+2N2fTpKqenaS85LcNGfbC6rqwjn7f/6YMPiUJL83hMcvZfK/hF9Lsq2qTqqqG4apGl9Osn/Y58xl9h0A1tKTk3x6zv1PD23zchwF4YzN531JHklyxSLb9XKevLvvyeTg8vw8ekrjriSV5K5hLvwdQ/tLjvO6DyZ5fnc/cc7l1O7+7PD8VyR5XpJvTbJj2GfeFScBYGQeyiQ8HfUPhrZk/uOw4yhbnnDGptLdX8pk2sTrqupHquq04WThC5M8fpHdT6qqU+dcHrWy4+Ctmcxxf06StydJVZ2a5Eczmf9+4ZzLy5NcfZwVpX4ryfXDCdCpqidV1dFg+YRMguYXk3xzkv+02PsHgHX0mLnH0Uym/f/ccGw7M8nPJ3nzsO3DSb7t6BRDx1GYEM7YdLr7V5L8TJJXJjmUyQHgvyd5VZI/Pc6ue5L8zZzLexfY7m1JZpK8t7u/MLRdOezzpu7+3NFLkjckOSnJZQs812uT7Evyh1X1lSR/luS7h8felMko3WeT3DM8BgBj9Z5843H01CTvT/KRJB/NZEGO/5gk3f3xTI6nnxymI/5wHEch1b2s2V0AAACsICNnAAAAIyCcAQAAjIBwBgAAMALCGQAAwAgIZwAAACOw0HdGrIozzzyzd+zYsZYvOVpf/epX8/jHL/a1WxylXtNTs+mo1zf6wAc+8IXuftJ694OlO5FjrM//9NRsemq2POo2vbHX7HjH2DUNZzt27Mj73//+tXzJ0Zqdnc3MzMx6d2PDUK/pqdl01OsbVdWn17sPTOdEjrE+/9NTs+mp2fKo2/TGXrPjHWNNawQAABgB4QwAAGAEhDMAAIAREM4AAABGQDgDAAAYAeEMAABgBIQzAACAERDOAAAARkA4AwAAGAHhDAAAYASEMwAAgBE4eb07wMa1Y8+t87bvv+HyNe4JAKtpod/3id/5ACvJyBkAAMAICGcAAAAjIJwBAACMgHAGAAAwAsIZAADACAhnAAAAIyCcAQAAjIBwBgAAMALCGQAAwAgIZwAAACMgnAEAAIyAcAYAADACwhkAAMAILCmcVdW/r6q7q+pjVfW2qjq1qs6oqtuq6v7h+vTV7iwAAMBmtWg4q6qzk/xUkp3d/V1JTkpyVZI9SW7v7vOT3D7cBwAAYBmWOq3x5CSPq6qTk3xzkoeSXJFk7/D43iRXrnz3AAAAtoZFw1l3fzbJf03ymSQHk/x1d/9hkm3dfXDY5mCSs1azowAAAJvZyYttMJxLdkWS85J8Kcnbq+pFS32BqtqdZHeSbNu2LbOzs8vr6SZz+PDhDV+Lay84Mm/7aryvzVCvtaZm01EvAGC9LRrOkjwvyae6+/NJUlXvSvK9SR6uqu3dfbCqtic5NN/O3X1jkhuTZOfOnT0zM7MiHd/oZmdns9Frcc2eW+dt33/1zIq/1mao11pTs+moFwCw3pZyztlnklxcVd9cVZXkkiT3JtmXZNewza4kt6xOFwEAADa/RUfOuvuOqnpHkg8mOZLkQ5mMhJ2W5OaqemkmAe6Fq9lRAACAzWwp0xrT3b+Q5BeOaX4kk1E0AAAATtBSl9IHAABgFQlnAAAAIyCcAQAAjIBwBgBroKpOqqoPVdW7h/tnVNVtVXX/cH36nG2vq6oHquq+qrp0/XoNwFoSzgBgbbwik6+iOWpPktu7+/wktw/3U1VPS3JVkqcnuSzJ66rqpDXuKwDrQDgDgFVWVeckuTzJ6+c0X5Fk73B7b5Ir57Tf1N2PdPenkjyQ5KK16isA60c4A4DV95okr0zy9Tlt27r7YJIM12cN7WcneXDOdgeGNgA2uSV9zxkAsDxV9QNJDnX3B6pqZim7zNPWCzz37iS7k2Tbtm2ZnZ1dVh8PHz583H2vveDIgo8t9zU3usVqxqOp2fKo2/Q2cs2EMwBYXc9O8oNV9YIkpyb5lqp6c5KHq2p7dx+squ1JDg3bH0hy7pz9z0ny0HxP3N03JrkxSXbu3NkzMzPL6uDs7GyOt+81e25d8LH9Vy/vNTe6xWrGo6nZ8qjb9DZyzUxrBIBV1N3Xdfc53b0jk4U+3tvdL0qyL8muYbNdSW4Zbu9LclVVnVJV5yU5P8mda9xtANaBkTOSJDsW+F/R/TdcvsY9Adgybkhyc1W9NMlnkrwwSbr77qq6Ock9SY4keVl3f239ugnAWhHOAGCNdPdsktnh9heTXLLAdtcnuX7NOgbAKJjWCAAAMALCGQAAwAgIZwAAACMgnAEAAIyAcAYAADACwhkAAMAICGcAAAAjIJwBAACMgHAGAAAwAsIZAADACAhnAAAAIyCcAQAAjIBwBgAAMALCGQAAwAgIZwAAACMgnAEAAIzAyevdAcZtx55b17sLAACwJRg5AwAAGAHhDAAAYASEMwAAgBEQzgAAAEZAOAMAABgBqzWO2HJWStx/w+Wr0BMAAGC1GTkDAAAYAeEMAABgBIQzAACAERDOAAAARkA4AwAAGAGrNa6z5azICAAAbD5GzgAAAEbAyBkAsGwLzQDxvZsA0xPONhnTJAEAYGMyrREAAGAEhDMAAIARMK1xBR1vSqG59wAAwPEYOQMAABgB4QwAAGAEhDMAAIAREM4AAABGYEnhrKqeWFXvqKqPV9W9VfU9VXVGVd1WVfcP16evdmcBAAA2q6WOnL02yf/q7n+S5BlJ7k2yJ8nt3X1+ktuH+wAAACzDouGsqr4lyXOSvCFJuvtvu/tLSa5IsnfYbG+SK1erkwAAAJvdUkbOnprk80l+u6o+VFWvr6rHJ9nW3QeTZLg+axX7CQAAsKkt5UuoT07yrCQv7+47quq1mWIKY1XtTrI7SbZt25bZ2dnl9HNDuPaCIws+duz7Pnz4cGZnZ4+7z0a1Gv/GR+vF0qnZdNQLAFhvSwlnB5Ic6O47hvvvyCScPVxV27v7YFVtT3Jovp27+8YkNybJzp07e2Zm5sR7PVLX7Ll1wcf2Xz3zDfdnZ2czMzNz3H02qmPf60o4Wi+WTs2mo14AwHpbdFpjd38uyYNV9R1D0yVJ7kmyL8muoW1XkltWpYcAAABbwFJGzpLk5UneUlWPTfLJJD+eSbC7uapemuQzSV64Ol0EAADY/JYUzrr7riQ753nokpXtDgAAwNa01O85AwCWqapOrao7q+rDVXV3Vf3S0H5GVd1WVfcP16fP2ee6qnqgqu6rqkvXr/cArBXhDABW3yNJntvdz0hyYZLLquriTBbYur27z09y+3A/VfW0JFcleXqSy5K8rqpOWpeeA7BmhDMAWGU9cXi4+5jh0kmuSLJ3aN+b5Mrh9hVJburuR7r7U0keSHLRGnYZgHUgnAHAGqiqk6rqrky+eua24StqtnX3wSQZrs8aNj87yYNzdj8wtAGwiS11tUYA4AR099eSXFhVT0zye1X1XcfZvOZ7ikdtVLU7ye4k2bZt27K/SH2xL2G/9oIjUz/nZv9Sd19cPz01Wx51m95GrplwBgBrqLu/VFWzmZxL9nBVbe/ug1W1PZNRtWQyUnbunN3OSfLQPM91Y5Ibk2Tnzp293C9SX+xL2K/Zc+vUz7n/6uX1ZaPwxfXTU7PlUbfpbeSamdYIAKusqp40jJilqh6X5HlJPp5kX5Jdw2a7ktwy3N6X5KqqOqWqzktyfpI717bXAKw1I2cAsPq2J9k7rLj4TUlu7u53V9X7ktxcVS9N8pkkL0yS7r67qm5Ock+SI0leNkyLBGATE84AYJV190eSPHOe9i8muWSBfa5Pcv0qdw2AETGtEQAAYASEMwAAgBEQzgAAAEZAOAMAABgB4QwAAGAEhDMAAIAREM4AAABGwPecseJ27Ll1wcf233D5GvYEAAA2DiNnAAAAIyCcAQAAjIBwBgAAMALCGQAAwAgIZwAAACMgnAEAAIyAcAYAADACwhkAAMAI+BLqNXLsFzNfe8GRXHOcL2sGAAC2FiNnAAAAIyCcAQAAjIBwBgAAMALCGQAAwAgIZwAAACMgnAEAAIyAcAYAADACwhkAAMAICGcAAAAjIJwBAACMgHAGAAAwAievdwfgqB17bp23ff8Nl69xTwAAYO0ZOQMAABgB4QwAAGAEhDMAAIAREM4AAABGQDgDAAAYAeEMAABgBIQzAACAERDOAAAARkA4AwAAGAHhDAAAYASEMwAAgBEQzgAAAEZAOAMAABgB4QwAAGAElhzOquqkqvpQVb17uH9GVd1WVfcP16evXjcBAAA2t2lGzl6R5N459/ckub27z09y+3AfAACAZVhSOKuqc5JcnuT1c5qvSLJ3uL03yZUr2zUAAICt4+QlbveaJK9M8oQ5bdu6+2CSdPfBqjprvh2raneS3Umybdu2zM7OLr+3I3ftBUeWvO22x023/WZxvH//heoxOzubw4cPb+rPzmpQs+moFwCw3hYNZ1X1A0kOdfcHqmpm2hfo7huT3JgkO3fu7JmZqZ9iw7hmz61L3vbaC47kVz+61Gy8eey/embBxxaq3/6rZzI7O5vN/NlZDWo2HfUCANbbUqY1PjvJD1bV/iQ3JXluVb05ycNVtT1JhutDq9ZLANjAqurcqvqjqrq3qu6uqlcM7QsurlVV11XVA1V1X1Vdun69B2CtLBrOuvu67j6nu3ckuSrJe7v7RUn2Jdk1bLYryS2r1ksA2NiOJLm2u78zycVJXlZVT8sCi2sNj12V5OlJLkvyuqo6aV16DsCaOZHvObshyfdV1f1Jvm+4DwAco7sPdvcHh9tfyWT147Oz8OJaVyS5qbsf6e5PJXkgyUVr22sA1tpUJz1192yS2eH2F5NcsvJdAoDNq6p2JHlmkjuy8OJaZyf5szm7HRjaANjEtt6KFACwTqrqtCTvTPLT3f3lqlpw03naep7nW5EVkRdbrXQ5qwtv9tVPrfA6PTVbHnWb3kaumXAGAGugqh6TSTB7S3e/a2h+uKq2D6NmcxfXOpDk3Dm7n5PkoWOfc6VWRF5stdJpViM+6nir824GVnidnpotj7pNbyPX7ETOOQMAlqAmQ2RvSHJvd//anIcWWlxrX5KrquqUqjovyflJ7lyr/gKwPoycsaZ2LON/XwE2gWcneXGSj1bVXUPbz2aymNbNVfXSJJ9J8sIk6e67q+rmJPdkstLjy7r7a2vfbQDWknAGAKusu/8k859HliywuFZ3X5/k+lXrFACjY1ojAADACBg5WwZT8wAAgJVm5AwAAGAEhDMAAIAREM4AAABGQDgDAAAYAeEMAABgBIQzAACAERDOAAAARkA4AwAAGAHhDAAAYASEMwAAgBEQzgAAAEZAOAMAABgB4QwAAGAEhDMAAIAREM4AAABGQDgDAAAYAeEMAABgBIQzAACAERDOAAAARkA4AwAAGAHhDAAAYASEMwAAgBEQzgAAAEZAOAMAABiBk9e7A7CYHXtuzbUXHMk1e2591GP7b7h8HXoEAAArz8gZAADACAhnAAAAIyCcAQAAjIBwBgAAMALCGQAAwAhYrZENbcc8KzgmVnEEAGDjMXIGAAAwAsIZAADACAhnAAAAIyCcAQAAjIBwBgAAMALCGQAAwAgIZwAAACMgnAEAAIyAcAYAADACJ693BwCAcdix59b17gLAlmbkDAAAYASEMwAAgBEQzgAAAEZg0XBWVedW1R9V1b1VdXdVvWJoP6Oqbquq+4fr01e/uwAAAJvTUkbOjiS5tru/M8nFSV5WVU9LsifJ7d19fpLbh/sAAAAsw6LhrLsPdvcHh9tfSXJvkrOTXJFk77DZ3iRXrlYnAQAANrupzjmrqh1JnpnkjiTbuvtgMglwSc5a6c4BwGZQVW+sqkNV9bE5bQueHlBV11XVA1V1X1Vduj69BmCtLfl7zqrqtCTvTPLT3f3lqlrqfruT7E6Sbdu2ZXZ2dhndHJdrLzhyws+x7XEr8zxbxbT12gyfsxN1+PBhdZiCerHKfifJryd505y2o6cH3FBVe4b7rxpOHbgqydOTPDnJ/66qf9zdX1vjPgOwxpYUzqrqMZkEs7d097uG5oerant3H6yq7UkOzbdvd9+Y5MYk2blzZ8/MzJx4r9fZNSvwJZ3XXnAkv/pR3wG+VNPWa//VM6vXmQ1idnY2m+Hnba2oF6upu/94mH0y1xVJZobbe5PMJnnV0H5Tdz+S5FNV9UCSi5K8by36CsD6WcpqjZXkDUnu7e5fm/PQviS7htu7ktyy8t0DgE1rodMDzk7y4JztDgxtAGxySxmKeHaSFyf5aFXdNbT9bJIbktxcVS9N8pkkL1ydLsLK2nGckc/9N1y+hj0BmNd85w30vBuu0KkDR6f1ruR0+80+TdhU6Omp2fKo2/Q2cs0WDWfd/SeZ/0CRJJesbHcAYMtY6PSAA0nOnbPdOUkemu8JVurUgaPTeldi2v5Rm316uanQ01Oz5VG36W3kmk21WiMAsGIWOj1gX5KrquqUqjovyflJ7lyH/gGwxqxIAQCrrKrelsniH2dW1YEkv5AFTg/o7rur6uYk9yQ5kuRlVmoE2BqEMwBYZd39Yws8NO/pAd19fZLrV69HAIyRaY0AAAAjIJwBAACMgHAGAAAwAsIZAADACAhnAAAAIyCcAQAAjIBwBgAAMAK+5wzm2LHn1nnb999w+Rr3BACArcbIGQAAwAgIZwAAACNgWuMCFprexsbg3w8AgI1GOAMAVtzx/pPMebwA8zOtEQAAYASEMwAAgBEQzgAAAEZAOAMAABgB4QwAAGAEhDMAAIAREM4AAABGQDgDAAAYAeEMAABgBIQzAACAETh5vTsAG8GOPbcua7/9N1y+wj0BAGCzMnIGAAAwAsIZAADACJjWCKtooemQpjsCAHAsI2cAAAAjIJwBAACMgHAGAAAwAsIZAADACAhnAAAAIyCcAQAAjIBwBgAAMALCGQAAwAgIZwAAACMgnAEAAIyAcAYAADACJ693B9bTjj23rncX2KKO99nbf8Pla9gTAADGwsgZAADACAhnAAAAI7ClpzXCGC1nuq2pkAAAG5+RMwAAgBEQzgAAAEbAtEbgURaaWmn6JLASrFgLMD8jZwAAACMgnAEAAIzAppnWaIoETGelv4R9OT+Dfm6BY5lWDWxlRs4AAABG4ITCWVVdVlX3VdUDVbVnpToFAFudYyzA1rPsaY1VdVKS30jyfUkOJPnzqtrX3fesVOeOtdLTsGCzmO9n49oLjuSakfzMbKWf3bWcqmn61+a1HsdYANbfiZxzdlGSB7r7k0lSVTcluSKJAwcAnBjH2Ck4fxXYLE4knJ2d5ME59w8k+e4T6w4AEMfYR1nuCPxKjtwfL+gt53WWExwF0b9n9sDy+Rwtbr0+X9Xdy9ux6oVJLu3ufzPcf3GSi7r75cdstzvJ7uHudyS5b/nd3VTOTPKF9e7EBqJe01Oz6ajXN3pKdz9pvTuxVa3DMdbnf3pqNj01Wx51m97Ya7bgMfZERs4OJDl3zv1zkjx07EbdfWOSG0/gdTalqnp/d+9c735sFOo1PTWbjnoxMmt6jPX5n56aTU/NlkfdpreRa3YiqzX+eZLzq+q8qnpskquS7FuZbgHAluYYC7AFLXvkrLuPVNVPJvmDJCcleWN3371iPQOALcoxFmBrOpFpjenu9yR5zwr1Zasx1XM66jU9NZuOejEqa3yM9fmfnppNT82WR92mt2FrtuwFQQAAAFg5J3LOGQAAACtEOFtBVfXGqjpUVR+b03ZGVd1WVfcP16fPeey6qnqgqu6rqkvntP+zqvro8Nh/q6pa6/ey2qrq3Kr6o6q6t6rurqpXDO3qtYCqOrWq7qyqDw81+6WhXc2Oo6pOqqoPVdW7h/vqBYOqumz4vD9QVXvWuz/roar2Dz/fd1XV+4e2Ffs9UVWnVNXvDu13VNWOtX6PJ2q1/745Xo2qatfwGvdX1a61eccrY4G6/WJVfXb4vN1VVS+Y89iWrlutwd+GG6Jm3e2yQpckz0nyrCQfm9P2K0n2DLf3JPnPw+2nJflwklOSnJfkE0lOGh67M8n3JKkkv5/k+ev93lahVtuTPGu4/YQkfzHURL0WrlklOW24/ZgkdyS5WM0WrdvPJHlrkncP99XLxaU7mSw08okkT03y2OHz/7T17tc61GF/kjOPaVux3xNJfiLJbw23r0ryu+v9npdRo1X9+2ahGiU5I8knh+vTh9unr3c9TrBuv5jkP8yz7ZavW9bgb8ONUDMjZyuou/84yV8e03xFkr3D7b1JrpzTflN3P9Ldn0ryQJKLqmp7km/p7vf15NPypjn7bBrdfbC7Pzjc/kqSe5OcHfVaUE8cHu4+Zrh01GxBVXVOksuTvH5Os3rBxEVJHujuT3b33ya5KZOfA1b298Tc53pHkks22uj7Gvx9s1CNLk1yW3f/ZXf/VZLbkly28u9wdSxQt4Vs+bqt0d+Go6+ZcLb6tnX3wWTyoUty1tB+dpIH52x3YGg7e7h9bPumNQwpPzOTkSD1Oo6aTNG7K8mhTH6JqNnxvSbJK5N8fU6besHEQp/5raaT/GFVfaCqdg9tK/l74u/26e4jSf46ybetwvtYa2tRo836Gf3JqvrIMO3x6BQ9dZtjFf82HH3NhLP1M9//mvVx2jelqjotyTuT/HR3f/l4m87TtuXq1d1f6+4Lk5yTyf8OfddxNt/SNauqH0hyqLs/sNRd5mnbMvViS/LZnnh2dz8ryfOTvKyqnnOcbZfze2Kr1Xkla7QZa/ebSf5hkguTHEzyq0O7ug1W+W/D0ddMOFt9Dw/DqxmuDw3tB5KcO2e7c5I8NLSfM0/7plNVj8nkh+8t3f2uoVm9lqC7v5RkNpMhdzWb37OT/GBV7c9kutZzq+rNUS84aqHP/JbS3Q8N14eS/F4m0z1X8vfE3+1TVScn+dYsfarbmK1FjTbdZ7S7Hx7+o/XrSf5HJp+3RN2SrMnfhqOvmXC2+vYlObriy64kt8xpv2pYNea8JOcnuXMYrv1KVV08zIF9yZx9No3hvb0hyb3d/WtzHlKvBVTVk6rqicPtxyV5XpKPR83m1d3Xdfc53b0jk5N+39vdL4p6wVF/nuT8qjqvqh6byc/JvnXu05qqqsdX1ROO3k7y/Uk+lpX9PTH3uX4kk99FG2YU4zjWokZ/kOT7q+r0Yfrf9w9tG9bRkDH4oUw+b4m6rdXfhuOv2TSrh7gsusrM2zIZov5/mSTwl2Yyj/X2JPcP12fM2f7Vmawsc1/mrP6WZGcmP6yfSPLrGb4sfDNdkvzzTIaLP5LkruHyAvU6bs3+aZIPDTX7WJKfH9rVbPHazeTvV2tULxeX4TL83v2L4bP96vXuzzq8/6dmstrbh5PcfbQGK/l7IsmpSd6eyWIFdyZ56nq/72XUaVX/vjlejZL866H9gSQ/vt61WIG6/c8kHx2O5fuSbFe3v+vzqv9tuBFqdrSjAAAArCPTGgEAAEZAOAMAABgB4QwAAGAEhDMAAIAREM4AAABGQDgDAAAYAeEMAABgBIQzAACAEfj/zbRLNhWNFzsAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 1080x360 with 2 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "# we will discretise two continuous variables\n",
    "\n",
    "X_train[[\"LotArea\", 'GrLivArea']].hist(bins=50)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The EqualWidthDiscretiser() works only with numerical variables.\n",
    "A list of variables can be passed as argument. Alternatively, the discretiser\n",
    "will automatically select all numerical variables.\n",
    "\n",
    "The EqualWidthDiscretiser() first finds the boundaries for the intervals for\n",
    "each variable, fit.\n",
    "\n",
    "Then, it transforms the variables, that is, sorts the values into the intervals,\n",
    "transform."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "EqualWidthDiscretiser(bins=10, return_boundaries=False, return_object=False,\n",
       "                      variables=['LotArea', 'GrLivArea'])"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "'''\n",
    "Parameters\n",
    "----------\n",
    "\n",
    "bins : int, default=10\n",
    "    Desired number of equal width intervals / bins.\n",
    "\n",
    "variables : list\n",
    "    The list of numerical variables to transform. If None, the\n",
    "    discretiser will automatically select all numerical type variables.\n",
    "\n",
    "return_object : bool, default=False\n",
    "    Whether the numbers in the discrete variable should be returned as\n",
    "    numeric or as object. The decision should be made by the user based on\n",
    "    whether they would like to proceed the engineering of the variable as\n",
    "    if it was numerical or categorical.\n",
    "\n",
    "return_boundaries: bool, default=False\n",
    "    whether the output should be the interval boundaries. If True, it returns\n",
    "    the interval boundaries. If False, it returns integers.\n",
    "'''\n",
    "\n",
    "ewd = EqualWidthDiscretiser(bins=10, variables=['LotArea', 'GrLivArea'])\n",
    "\n",
    "ewd.fit(X_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'LotArea': [-inf,\n",
       "  22694.5,\n",
       "  44089.0,\n",
       "  65483.5,\n",
       "  86878.0,\n",
       "  108272.5,\n",
       "  129667.0,\n",
       "  151061.5,\n",
       "  172456.0,\n",
       "  193850.5,\n",
       "  inf],\n",
       " 'GrLivArea': [-inf,\n",
       "  768.2,\n",
       "  1202.4,\n",
       "  1636.6,\n",
       "  2070.8,\n",
       "  2505.0,\n",
       "  2939.2,\n",
       "  3373.4,\n",
       "  3807.6,\n",
       "  4241.799999999999,\n",
       "  inf]}"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# binner_dict contains the boundaries of the different bins\n",
    "ewd.binner_dict_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_t = ewd.transform(X_train)\n",
    "test_t = ewd.transform(X_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([3, 2, 1, 0, 4, 6, 5, 7, 9], dtype=int64)"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# the below are the bins into which the observations were sorted\n",
    "train_t['GrLivArea'].unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>LotArea</th>\n",
       "      <th>GrLivArea</th>\n",
       "      <th>LotArea_binned</th>\n",
       "      <th>GrLivArea_binned</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>64</th>\n",
       "      <td>9375</td>\n",
       "      <td>2034</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>682</th>\n",
       "      <td>2887</td>\n",
       "      <td>1291</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>960</th>\n",
       "      <td>7207</td>\n",
       "      <td>858</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1384</th>\n",
       "      <td>9060</td>\n",
       "      <td>1258</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1100</th>\n",
       "      <td>8400</td>\n",
       "      <td>438</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      LotArea  GrLivArea  LotArea_binned  GrLivArea_binned\n",
       "64       9375       2034               0                 3\n",
       "682      2887       1291               0                 2\n",
       "960      7207        858               0                 1\n",
       "1384     9060       1258               0                 2\n",
       "1100     8400        438               0                 0"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# here I put side by side the original variable and the transformed variable\n",
    "tmp = pd.concat([X_train[[\"LotArea\", 'GrLivArea']],\n",
    "                 train_t[[\"LotArea\", 'GrLivArea']]], axis=1)\n",
    "\n",
    "tmp.columns = [\"LotArea\", 'GrLivArea', \"LotArea_binned\", 'GrLivArea_binned']\n",
    "\n",
    "tmp.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAA3sAAAFKCAYAAACkdEbCAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nO3de7wddXnv8c+XiyAXFSQgkGDQxguoRU1Ri7VUraCooC0KVYqK0vbgrcdag7VFa1Np6+XosdLiNVYFU7wQhSpIBY9WgYAoBFBSQEhBiFfAC5r4nD9mtiw2e++sJHvttdbk83691mvN+s1vZp5ZO3s/eWZ+M5OqQpIkSZLULVsNOwBJkiRJ0uyz2JMkSZKkDrLYkyRJkqQOstiTJEmSpA6y2JMkSZKkDrLYkyRJkqQOstjTnEvyoSR/N6RtJ8kHk/wwyUUbuezCJJVkm0HFNwhJXpDknGHHMZuS3JHkQcOOY0OSHJxkzbDjkDQ+zJFzyxw5PObIuWGxJ5Jcn+SWJDv2tL00yflDDGtQngj8PjC/qg4cdjCzbapkW1UfraqnDTOu2VZVO1XVtf30bb+P3xh0TJK6yRzZHebIezJHdp/FniZsA7xq2EFsrCRbb+QiDwSur6qfDCKezdUeVfX3sjXsI8TD3r6kkWGOHAHmyLsbdo4a9vbVH39hNOGfgL9Icr/JM6Y6Epbk/CQvbadflOQrSd6R5EdJrk3y2237jUluTXLspNXuluTcJLcnuSDJA3vW/bB23g+SfCvJ83rmfSjJKUnOTvIT4PemiHevJCva5VcneVnbfhzwPuAJ7RCHN02x7FZJ3pDkO23cH05y30ndXpLkpiQ3J3lNz7IHJlmZ5Lb2KPDbe+Y9Psl/td/PN5IcPOm7XJrkK8BPgdcnWTkprj9PsqKdPizJ19vt3JjkjT1dv9S+/6jdxye0P4cv96zrt5NcnOTH7ftvT4rlze3P8/Yk5yTZrZ23fZKPJPl+ux8XJ9lj8nfY9r0+yYlJrkwzHOiDSbbvmf/MJJe16/mvJI+atOzrknwT+MlUyaT3SGT7b+Kfk5zVxnxhkge38ya+j2+038fzN2H7b0hyxqTtvzPJu9rpFye5qt32tUn+ZKrvRNJYM0dijow50hw5jqrK1xb+Aq4Hngp8Evi7tu2lwPnt9EKggG16ljkfeGk7/SJgHfBiYGvg74AbgH8GtgOeBtwO7NT2/1D7+Unt/HcCX27n7Qjc2K5rG+AxwPeA/XuW/TFwEM3Biu2n2J8LgPcA2wMHAGuBp/TE+uUZvouXAKuBBwE7td/Jv036Hk5r43xku+6ntvO/ChzTTu8EPL6d3hv4PvCMNubfbz/P6/kubwD2b/f5vu33s6gnrouBo9rpg9ttbwU8CrgFOGKGn9Wv9xnYFfghcEy7raPbz/fvieW/gYcA924/n9zO+xPgM8AO7c/5scB9Zvg3dQWwoN3mV7jr39ZjgFuBx7XrObbtv13Pspe1y957mvUX8Bs9/yZ+ABzY7tNHgdOn6rsp26c50v3TiX1tl7m55+d7GPBgIMDvtn0f0/OzWjPs33Ffvnxt+gtzZO+y5khz5N22jzly5F+e2VOvvwFekWTeJix7XVV9sKrWAx+n+SPwt1V1Z1WdA/wC6B0TflZVfamq7gT+iuZI4gLgmTRDSD5YVeuq6lLgE8Af9ix7ZlV9pap+VVU/7w2iXccTgddV1c+r6jKaI5XH9LkfLwDeXlXXVtUdwInAUZOOnL2pqn5SVZcDH6RJBgC/BH4jyW5VdUdVfa1tfyFwdlWd3cZ8LrCSJrFN+FBVrWr3+cfAmRPrTbIIeBiwAqCqzq+qy9t1fZMmsf5un/t3GHBNVf1bu63TgKuBZ/X0+WBVfbuqfgYsp/nPwMT+3Z8mKayvqkuq6rYZtvXuqrqxqn4ALO35nl4G/GtVXdiuZxlwJ/D4nmXf1S77sz7365NVdVFVraNJZAfM0Hejtl9V3wEuBY5o5z0Z+OnEz7eqzqqq/67GBcA5wO/0Gbek8WGONEeCOfJu2zdHjj6LPf1aVV0BfBZYsgmL39Iz/bN2fZPbdur5fGPPdu+gOeq0F80Rose1Qwd+lORHNMnlAVMtO4W9gB9U1e09bd+hOXLYj73a/r3LbgP0DsW4cdL8vdrp42iO9l3dDt94Ztv+QODISfv0RGDPGfbpY9z1h/+PgE9X1U8BkjwuyReTrE3yY+BPgd02cf8m9qH3+/luz/RPuevn9m/A54HT2yE6/5hk2xm2Nd339EDgNZO+jwU98ycv24/pYp7Kpmx/8s/jYxMzkjw9ydfSDIn6Ec1/UPr9eUgaE+bIXy9vjryLObJhjhxhFnua7CSaozq9f9gmLtTeoaetN7FsigUTE0l2ohnGcBPNH5ALqup+Pa+dqurPepatGdZ7E7Brkp172vYB/qfPuG6i+UPXu+w67p6oF0yafxNAVV1TVUcDuwP/AJyR5u5tN9IMc+ndpx2r6uQZ9ukcmms2DqD5A/qxnnkfozmCuaCq7gv8C83wiKnWs6H9m9iHDX4/VfXLqnpTVe0H/DbNEeY/nmGRKb8nmu9j6aTvY4f2COqvN7eheDbDpmz/34GDk8wHnkP780iyHc1R9bcCe1TV/YCzuevnIalbzJHmyCmZI82Ro8piT3dTVatphpi8sqdtLc0fuhcm2TrJS2jGX2+OZyR5YpJ7AW8GLqyqG2mOmj4kyTFJtm1fv5Xk4X3GfyPwX8Bb0lws/Siao4kf7TOu04A/T7Jvm2D/Hvh4O/Rhwl8n2SHJ/jTXTXwcIMkLk8yrql8BP2r7rgc+AjwrySHt97d9mmfLzJ9hP9YBZ9DcFGBX4Nye2TvTHJn9eZIDaY6iTVgL/IrmeoqpnE3z/f5Rkm3ai7H3o/neZ5Tk95I8Ms3d3W6jGbKyfoZFTkgyP8muwOtpvyfgvcCftkdfk2THNBfU7zz9qjbLLdz9+9jo7be/A+fTDEm6rqquamfdi+aamrXAuiRPp7n+RlIHmSPNkdMxR5ojR5XFnqbytzQXV/d6GfBamoum96dJFpvjYzRHSH9AcxHzCwDaoSVPA46iOcr1XZojgNttxLqPprkI+ybgU8BJ7TUA/fgAzVCMLwHXAT8HXjGpzwU0F6ifB7y1mustAA4FViW5g+aC+qOquSbiRuBwmj/ma2mOmr2WDf/+fYzmpgD/PimR/i/gb5PcTnMNyfKJGe0wlqXAV9rhF71j7Kmq79McbXwNzc/yL4FnVtX3NhALNEeqz6BJYle138NHNhD/OcC17evv2hhW0vx7ejfNhe+raS6QH5Q3Asva7+N5m7H9iZ/Hr48gt/9eX0nzM/ghzX8qVsxm8JJGjjnSHDkVc6Q5ciSlapBngiVtiZJcT3Mnui8MOxZJkkaJOVJzyTN7kiRJktRBFnuSJEmS1EEO45QkSZKkDvLMniRJkiR1kMWeJEmSJHXQNsMOYHPstttutXDhwmGHIUmaA5dccsn3qmresOMYF+ZISdoyzJQfB1bsJdme5jks27XbOaOqTkryRprnd6xtu76+qs5ulzmR5uGe64FXVtXnZ9rGwoULWbly5YD2QJI0SpJ8Z9gxjBNzpCRtGWbKj4M8s3cn8OSquiPJtsCXk/xHO+8dVfXWSUHuR/OQ0P2BvYAvJHlIVa0fYIySJEmS1EkDu2avGne0H7dtXzPd+vNw4PSqurOqrgNWAwcOKj5JkiRJ6rKB3qAlydZJLgNuBc6tqgvbWS9P8s0kH0iyS9u2N3Bjz+Jr2rbJ6zw+ycokK9euXTt5tiRJkiSJARd7VbW+qg4A5gMHJnkEcArwYOAA4GbgbW33TLWKKdZ5alUtrqrF8+Z5nb4kSZIkTWVOHr1QVT8CzgcOrapb2iLwV8B7uWuo5hpgQc9i84Gb5iI+SZIkSeqagRV7SeYluV87fW/gqcDVSfbs6fYc4Ip2egVwVJLtkuwLLAIuGlR8kiRJktRlgzyztyfwxSTfBC6muWbvs8A/Jrm8bf894M8BqmoVsBy4EvgccIJ34pQkjav2uvRbk1zR07ZrknOTXNO+79Iz78Qkq5N8K8khPe2PbfPm6iTvSjLVZQ+SJN3DIO/G+c2qenRVPaqqHlFVf9u2H1NVj2zbn11VN/css7SqHlxVD62q/5h+7ZIkjbwPAYdOalsCnFdVi4Dz2s+THz90KPCeJFu3y5wCHE8z4mXRFOuUJGlKc3LNniRJW5qq+hLwg0nNhwPL2ullwBE97fd4/FB76cN9quqrVVXAh3uWkSRpRhZ7kiTNnT0mRrS077u37dM9fmjvdnpy+5R8PJEkqZfFniRJwzfd44f6eizRr2f4eCJJUo9thh2AxtPCJWcNZL3Xn3zYQNYrSSPiliR7VtXN7RDNW9v26R4/tKadntw+EIP62z4d/+ZL0mB5Zk+SpLmzAji2nT4WOLOn/R6PH2qHet6e5PHtXTj/uGcZSZJm5Jk9SZIGIMlpwMHAbknWACcBJwPLkxwH3AAcCc3jh5JMPH5oHXd//NCf0dzZ897Af7QvSZI2yGJPkqQBqKqjp5n1lGn6LwWWTtG+EnjELIYmSdpCOIxTkiRJkjrIYk+SJEmSOshiT5IkSZI6yGJPkiRJkjrIYk+SJEmSOshiT5IkSZI6yGJPkiRJkjrIYk+SJEmSOshiT5IkSZI6yGJPkiRJkjrIYk+SJEmSOshiT5IkSZI6yGJPkiRJkjrIYk+SJEmSOshiT5IkSZI6yGJPkiRJkjrIYk+SJEmSOshiT5IkSZI6yGJPkiRJkjrIYk+SJEmSOshiT5IkSZI6yGJPkiRJkjpoYMVeku2TXJTkG0lWJXlT275rknOTXNO+79KzzIlJVif5VpJDBhWbJEmSJHXdIM/s3Qk8uap+EzgAODTJ44ElwHlVtQg4r/1Mkv2Ao4D9gUOB9yTZeoDxSZIkSVJnDazYq8Yd7cdt21cBhwPL2vZlwBHt9OHA6VV1Z1VdB6wGDhxUfJIkSZLUZQO9Zi/J1kkuA24Fzq2qC4E9qupmgPZ997b73sCNPYuvadsmr/P4JCuTrFy7du0gw5ckSZKksTXQYq+q1lfVAcB84MAkj5ihe6ZaxRTrPLWqFlfV4nnz5s1WqJIkSZLUKXNyN86q+hFwPs21eLck2ROgfb+17bYGWNCz2HzgprmIT5IkSZK6ZpB345yX5H7t9L2BpwJXAyuAY9tuxwJnttMrgKOSbJdkX2ARcNGg4pMkSZKkLttmgOveE1jW3lFzK2B5VX02yVeB5UmOA24AjgSoqlVJlgNXAuuAE6pq/QDjkyRJkqTOGlixV1XfBB49Rfv3gadMs8xSYOmgYpIkSZKkLcWcXLMnSZIkSZpbFnuSJEmS1EEWe5IkSZLUQRZ7kiRJktRBFnuSJEmS1EEWe5IkSZLUQRZ7kiRJktRBFnuSJEmS1EEWe5IkSZLUQdsMOwDd3cIlZ836Oq8/+bBZX6ckSZKk0eaZPUmSJEnqIIs9SZIkSeogiz1JkuZYkj9PsirJFUlOS7J9kl2TnJvkmvZ9l57+JyZZneRbSQ4ZZuySpPFhsSdJ0hxKsjfwSmBxVT0C2Bo4ClgCnFdVi4Dz2s8k2a+dvz9wKPCeJFsPI3ZJ0nix2JMkae5tA9w7yTbADsBNwOHAsnb+MuCIdvpw4PSqurOqrgNWAwfOcbySpDFksSdJ0hyqqv8B3grcANwM/LiqzgH2qKqb2z43A7u3i+wN3NizijVt2z0kOT7JyiQr165dO6hdkCSNCYs9SZLmUHst3uHAvsBewI5JXjjTIlO01VQdq+rUqlpcVYvnzZu3+cFKksaaxZ4kSXPrqcB1VbW2qn4JfBL4beCWJHsCtO+3tv3XAAt6lp9PM+xTkqQZWexJkjS3bgAen2SHJAGeAlwFrACObfscC5zZTq8AjkqyXZJ9gUXARXMcsyRpDG0z7AAkSdqSVNWFSc4ALgXWAV8HTgV2ApYnOY6mIDyy7b8qyXLgyrb/CVW1fijBS5LGisWeJElzrKpOAk6a1HwnzVm+qfovBZYOOi5JUrc4jFOSJEmSOshiT5IkSZI6yGJPkiRJkjrIYk+SJEmSOshiT5IkSZI6yGJPkiRJkjrIYk+SJEmSOshiT5IkSZI6aGDFXpIFSb6Y5Kokq5K8qm1/Y5L/SXJZ+3pGzzInJlmd5FtJDhlUbJIkSZLUddsMcN3rgNdU1aVJdgYuSXJuO+8dVfXW3s5J9gOOAvYH9gK+kOQhVbV+gDFKkiRJUicN7MxeVd1cVZe207cDVwF7z7DI4cDpVXVnVV0HrAYOHFR8kiRJktRlc3LNXpKFwKOBC9umlyf5ZpIPJNmlbdsbuLFnsTXMXBxKkiRJkqYxyGGcACTZCfgE8Oqqui3JKcCbgWrf3wa8BMgUi9cU6zseOB5gn332GVTY6pCFS86a9XVef/Jhs75OSZIkaTYN9Mxekm1pCr2PVtUnAarqlqpaX1W/At7LXUM11wALehafD9w0eZ1VdWpVLa6qxfPmzRtk+JIkSZI0tgZ5N84A7weuqqq397Tv2dPtOcAV7fQK4Kgk2yXZF1gEXDSo+CRJkiSpywY5jPMg4Bjg8iSXtW2vB45OcgDNEM3rgT8BqKpVSZYDV9LcyfME78QpSZIkSZtmYMVeVX2Zqa/DO3uGZZYCSwcVkyRJkiRtKebkbpySJEmSpLllsSdJkiRJHWSxJ0mSJEkdZLEnSZIkSR1ksSdJkiRJHWSxJ0mSJEkdtMFiL8mRSXZup9+Q5JNJHjP40CRJGj7zoCRpXPVzZu+vq+r2JE8EDgGWAacMNixJkkaGeVCSNJb6KfbWt++HAadU1ZnAvQYXkiRJI8U8KEkaS/0Ue/+T5F+B5wFnJ9muz+UkSeoC86AkaSz1k6yeB3weOLSqfgTsCrx2oFFJkjQ6zIOSpLG0wWKvqn4K3Ao8sW1aB1wzyKAkSRoV5kFJ0rjq526cJwGvA05sm7YFPjLIoCRJGhXmQUnSuOpnGOdzgGcDPwGoqpuAnQcZlCRJI8Q8KEkaS/0Ue7+oqgIKIMmOgw1JkqSRYh6UJI2lfoq95e1dyO6X5GXAF4D3DjYsSZJGhnlQkjSWttlQh6p6a5LfB24DHgr8TVWdO/DIJEkaAeZBSdK42mCx1w5X+c+qOjfJQ4GHJtm2qn45+PAkSRou86AkaVz1M4zzS8B2SfamGbryYuBDgwxKkqQRYh6UJI2lfoq9tM8Yei7wf6vqOcB+gw1LkqSRYR6UJI2lvoq9JE8AXgCc1bZtcPinJEkdYR6UJI2lfoq9V9M8SPZTVbUqyYOALw42LEmSRoZ5UJI0lvq5G+cFwAU9n68FXjnIoCRJGhXmQUnSuOrnbpxfpH2QbK+qevJAIpIkaYSYByVJ46qfaw7+omd6e+APgHWDCUeSpJFjHpQkjaV+hnFeMqnpK0kumLKzJEkdYx6UJI2rDd6gJcmuPa/dkhwCPGAOYpMkaegGkQeT3C/JGUmuTnJVkie06z83yTXt+y49/U9MsjrJt9rtS5K0Qf0M47yE5lqF0AxbuQ44bpBBSZI0QgaRB98JfK6q/jDJvYAdgNcD51XVyUmWAEuA1yXZDzgK2B/YC/hCkodU1frNjEGS1HH9DOPcdy4CkSRpFM12HkxyH+BJwIva9f8C+EWSw4GD227LgPOB1wGHA6dX1Z3AdUlWAwcCX53NuCRJ3dPPMM5tk7yyHW5yRpKXJ9m2j+UWJPliOzxlVZJXte0OU5EkjY1NzYMzeBCwFvhgkq8neV+SHYE9qupmgPZ997b/3sCNPcuvadumivX4JCuTrFy7du1mhChJ6oJ+Hqp+CvBY4D3t67Ft24asA15TVQ8HHg+c0A5FWUIzTGURcF77mUnDVA4F3pNk643bHUmSZt2m5sHpbAM8Bjilqh4N/IQ2F04jU7Td41EQAFV1alUtrqrF8+bN24wQJUld0M81e79VVb/Z8/k/k3xjQwu1RyUnjlDenuQqmiORDlORJI2TTcqDM1gDrKmqC9vPZ9AUe7ck2bOqbk6yJ3BrT/8FPcvPB27ajO1LkrYQ/ZzZW5/kwRMfkjwI2KiLwpMsBB4NXMgsDFORJGkObXYe7FVV3wVuTPLQtukpwJXACuDYtu1Y4Mx2egVwVJLtkuwLLAIu2tTtS5K2HP2c2Xst8MUk19IMJXkg8OJ+N5BkJ+ATwKur6rZkqtEoTdcp2u4xTCXJ8cDxAPvss0+/YUiStKk2Kw9O4xXAR9s7cV7brm8rYHmS44AbgCMBqmpVkuU0BeE64ATvxClJ6kc/d+M8L8ki4KE0Se7qdqjlBrUXsH8C+GhVfbJt3qxhKlV1KnAqwOLFi6e8ZkGSpNmyOXlwhnVeBiyeYtZTpum/FFi6OduUJG15+hnGCc3F6I8AfhN4fpI/3tACaU7hvR+4qqre3jPLYSqSpHGz0XlQkqRh2+CZvST/BjwYuIy7rlEo4MMbWPQg4Bjg8iSXtW2vB07GYSqSpDGxGXlQkqSh6ueavcXAflW1UUMmq+rLTH0dHjhMRZI0PjYpD0qSNGz9DOO8AnjAoAORJGlEmQclSWNp2jN7ST5DM0xlZ+DKJBcBv74gvaqePfjwJEkaDvOgJGnczTSM861zFoUkSaPHPChJGmvTFntVdcFcBiJJ0igxD0qSxl2/j16QJEmSJI0Riz1JkiRJ6qBpi70k57Xv/zB34UiSNBrMg5KkcTfTDVr2TPK7wLOTnM6kZ+ZV1aUDjUySpOEyD0qSxtpMxd7fAEuA+cDbJ80r4MmDCkqSpBFgHpQkjbWZ7sZ5BnBGkr+uqjfPYUySJA2deVCSNO5mOrMHQFW9OcmzgSe1TedX1WcHG5YkSaPBPChJGlcbvBtnkrcArwKubF+vatskSeo886AkaVxt8MwecBhwQFX9CiDJMuDrwImDDEySpBFhHpQkjaV+n7N3v57p+w4iEEmSRph5UJI0dvo5s/cW4OtJvkhz2+kn4dFMSdKWwzwoSRpL/dyg5bQk5wO/RZPkXldV3x10YJIkjQLzoCRpXPVzZo+quhlYMeBYJEkaSeZBSdI46veaPUmSJEnSGLHYkyRJkqQOmrHYS7JVkivmKhhJkkaJeVCSNM5mLPbaZwp9I8k+cxSPJEkjwzwoSRpn/dygZU9gVZKLgJ9MNFbVswcWlSRJo8M8KEkaS/0Ue28aeBSSJI0u86AkaSz185y9C5I8EFhUVV9IsgOw9eBDkyRp+MyDkqRxtcG7cSZ5GXAG8K9t097ApwcZlCRJo8I8KEkaV/08euEE4CDgNoCqugbYfZBBSZI0QsyDkqSx1E+xd2dV/WLiQ5JtgBpcSJIkjRTzoCRpLPVT7F2Q5PXAvZP8PvDvwGcGG5YkSSPDPChJGkv9FHtLgLXA5cCfAGcDbxhkUJIkjRDzoCRpLPVzN85fJVkGXEgzbOVbVbXB4StJPgA8E7i1qh7Rtr0ReBlN0gR4fVWd3c47ETgOWA+8sqo+v/G7I0nS7NrUPChJ0rD1czfOw4D/Bt4FvBtYneTpfaz7Q8ChU7S/o6oOaF8Thd5+wFHA/u0y70niba0lSUO3GXlQkqSh6ueh6m8Dfq+qVgMkeTBwFvAfMy1UVV9KsrDPOA4HTq+qO4HrkqwGDgS+2ufykiQNyiblQUmShq2fa/ZunUhwrWuBWzdjmy9P8s0kH0iyS9u2N3BjT581bZskScM223lQkqQ5Me2ZvSTPbSdXJTkbWE5zrcKRwMWbuL1TgDe363kzzdHSlwCZou+U10MkOR44HmCfffbZxDAkSZrZgPKgJElzZqZhnM/qmb4F+N12ei2wyz27b1hV3TIxneS9wGfbj2uABT1d5wM3TbOOU4FTARYvXuwF8pKkQZn1PChJ0lyattirqhfP9saS7FlVN7cfnwNc0U6vAD6W5O3AXsAi4KLZ3r4kSf0aRB6UJGkubfAGLUn2BV4BLOztX1XP3sBypwEHA7slWQOcBByc5ACaYTDX0zyviKpalWQ5cCWwDjihqtZv/O5IkjS7NjUPSpI0bP3cjfPTwPuBzwC/6nfFVXX0FM3vn6H/UmBpv+uXJGmObFIelCRp2Pop9n5eVe8aeCSSJI0m86AkaSz1U+y9M8lJwDnAnRONVXXpwKKSJGl0mAclSWOpn2LvkcAxwJO5a/hKtZ8lSeq6geTBJFsDK4H/qapnJtkV+DjNtYHXA8+rqh+2fU8EjgPWA6+sqs9vzrYlSVuGfoq95wAPqqpfDDoYSZJG0KDy4KuAq4D7tJ+XAOdV1clJlrSfX5dkP+AoYH+aO1Z/IclDvJGZJGlDtuqjzzeA+w06EEmSRtSs58Ek84HDgPf1NB8OLGunlwFH9LSfXlV3VtV1wGrgwNmMR5LUTf2c2dsDuDrJxdz9WgVvOS1J2hIMIg/+H+AvgZ17tzPxLNqqujnJ7m373sDXevqtadvuIcnxwPEA++yzz2aEJ0nqgn6KvZMGHoUkSaNrVvNgkmcCt1bVJUkO7meRKdpqqo5VdSpwKsDixYun7CNJ2nJssNirqgvmIhBJkkbRAPLgQcCzkzwD2B64T5KPALck2bM9q7cncGvbfw2woGf5+cBNsxyTJKmDNnjNXpLbk9zWvn6eZH2S2+YiOEmShm2282BVnVhV86tqIc2NV/6zql4IrACObbsdC5zZTq8AjkqyXZJ9gUXARZu8Q5KkLUY/Z/Z6rycgyRF4YbgkaQsxh3nwZGB5kuOAG4Aj2+2vSrIcuBJYB5zgnTglSf3o55q9u6mqT7e3hJYkaYszm3mwqs4Hzm+nvw88ZZp+S4Gls7FNSdKWY4PFXpLn9nzcCljMNBeGS5LUNeZBSdK46ufM3rN6ptcB19M880eSpC2BeVCSNJb6uWbvxXMRiCRJo8g8KEkaV9MWe0n+ZoblqqrePIB4JEkaCeZBSdK4m+nM3k+maNsROA64P2CSkyR1mXlQkjTWpi32quptE9NJdgZeBbwYOB1423TLSZLUBeZBSdK4m/GavSS7Av8beAGwDHhMVf1wLgKTJGnYzIOSpHE20zV7/wQ8FzgVeGRV3TFnUUmSNGTmQUnSuNtqhnmvAfYC3gDclOS29nV7ktvmJjxJkobGPChJGmszXbM3UyEoSVKnmQclSePORCZJkgrHzhQAABKXSURBVCRJHWSxJ0mSJEkdZLEnSZIkSR1ksSdJkiRJHWSxJ0mSJEkdZLEnSZIkSR1ksSdJkiRJHWSxJ0mSJEkdZLEnSZIkSR00sGIvyQeS3Jrkip62XZOcm+Sa9n2XnnknJlmd5FtJDhlUXJIkSZK0JRjkmb0PAYdOalsCnFdVi4Dz2s8k2Q84Cti/XeY9SbYeYGySJEmS1GkDK/aq6kvADyY1Hw4sa6eXAUf0tJ9eVXdW1XXAauDAQcUmSZIkSV0319fs7VFVNwO077u37XsDN/b0W9O23UOS45OsTLJy7dq1Aw1WkiRJksbVqNygJVO01VQdq+rUqlpcVYvnzZs34LAkSZIkaTzNdbF3S5I9Adr3W9v2NcCCnn7zgZvmODZJkiRJ6oy5LvZWAMe208cCZ/a0H5VkuyT7AouAi+Y4NkmSJEnqjG0GteIkpwEHA7slWQOcBJwMLE9yHHADcCRAVa1Kshy4ElgHnFBV6wcVmyRJkiR13cCKvao6eppZT5mm/1Jg6aDikSRJkqQtyajcoEWSJEmSNIss9iRJkiSpgyz2JEmSJKmDLPYkSZIkqYMs9iRJkiSpgyz2JEmSJKmDBvboBUkbZ+GSs2Z9ndeffNisr1OSJEnjwTN7kiRJktRBFnuSJEmS1EEWe5IkSZLUQRZ7kiRJktRBFnuSJEmS1EEWe5IkSZLUQRZ7kiRJktRBFnuSJEmS1EEWe5IkzaEkC5J8MclVSVYleVXbvmuSc5Nc077v0rPMiUlWJ/lWkkOGF70kaZxY7EmSNLfWAa+pqocDjwdOSLIfsAQ4r6oWAee1n2nnHQXsDxwKvCfJ1kOJXJI0Viz2JEmaQ1V1c1Vd2k7fDlwF7A0cDixruy0DjminDwdOr6o7q+o6YDVw4NxGLUkaRxZ7kiQNSZKFwKOBC4E9qupmaApCYPe2297AjT2LrWnbplrf8UlWJlm5du3aQYUtSRoTFnuSJA1Bkp2ATwCvrqrbZuo6RVtN1bGqTq2qxVW1eN68ebMRpiRpjFnsSZI0x5JsS1PofbSqPtk235Jkz3b+nsCtbfsaYEHP4vOBm+YqVknS+LLYkyRpDiUJ8H7gqqp6e8+sFcCx7fSxwJk97Ucl2S7JvsAi4KK5ileSNL62GXYAkiRtYQ4CjgEuT3JZ2/Z64GRgeZLjgBuAIwGqalWS5cCVNHfyPKGq1s992JKkcWOxJ0nSHKqqLzP1dXgAT5lmmaXA0oEFJUnqJIdxSpIkSVIHWexJkiRJUgdZ7EmSJElSB1nsSZIkSVIHWexJkiRJUgcN5W6cSa4HbgfWA+uqanGSXYGPAwuB64HnVdUPhxGfJEmSJI27YZ7Z+72qOqCqFreflwDnVdUi4Lz2syRJkiRpE4zSMM7DgWXt9DLgiCHGIkmSJEljbVjFXgHnJLkkyfFt2x5VdTNA+777VAsmOT7JyiQr165dO0fhSpIkSdJ4Gco1e8BBVXVTkt2Bc5Nc3e+CVXUqcCrA4sWLa1ABSpIkSdI4G8qZvaq6qX2/FfgUcCBwS5I9Adr3W4cRmyRJkiR1wZwXe0l2TLLzxDTwNOAKYAVwbNvtWODMuY5NkiRJkrpiGMM49wA+lWRi+x+rqs8luRhYnuQ44AbgyCHEJkmSJEmdMOfFXlVdC/zmFO3fB54y1/FIkiRJUheN0qMXJEmSJEmzxGJPkiRJkjrIYk+SJEmSOshiT5IkSZI6yGJPkiRJkjrIYk+SJEmSOshiT5IkSZI6yGJPkiRJkjpozh+qLmm8LVxy1qyv8/qTD5v1dUqSJG3pPLMnSZIkSR1ksSdJkiRJHWSxJ0mSJEkdZLEnSZIkSR1ksSdJkiRJHWSxJ0mSJEkdZLEnSZIkSR20xTxnz2eDSZIkSdqSeGZPkiRJkjrIYk+SJEmSOshiT5IkSZI6yGJPkiRJkjrIYk+SJEmSOshiT5IkSZI6yGJPkiRJkjrIYk+SJEmSOshiT5IkSZI6yGJPkiRJkjpom2EHIEmDsnDJWbO+zutPPmzW1ylJkjQIFnuSJKnzBnHwZzoeFJI0Kkau2EtyKPBOYGvgfVV18pBDkqSB8gyk+mF+lCRtrJG6Zi/J1sA/A08H9gOOTrLfcKOSJGm4zI+SpE0xamf2DgRWV9W1AElOBw4HrhxqVJIkz0AOl/lRU3J4qqSZjFqxtzdwY8/nNcDjhhSLJEmjwvyoLU5XC1n3a/PN9YGHcd63VNWsrnBzJDkSOKSqXtp+PgY4sKpe0dPneOD49uNDgW/Nchi7Ad+b5XUOyrjEapyzb1xiNc7ZNS5xwmBifWBVzZvldY6FfvJj2z7oHDmdcfq3uTG6ul/Q3X1zv8ZPV/dtLvdr2vw4amf21gALej7PB27q7VBVpwKnDiqAJCuravGg1j+bxiVW45x94xKrcc6ucYkTxivWMbHB/AiDz5HT6erPu6v7Bd3dN/dr/HR130Zlv0bqBi3AxcCiJPsmuRdwFLBiyDFJkjRs5kdJ0kYbqTN7VbUuycuBz9PcWvoDVbVqyGFJkjRU5kdJ0qYYqWIPoKrOBs4eYghzPvxlM4xLrMY5+8YlVuOcXeMSJ4xXrGNhBPLjTLr68+7qfkF39839Gj9d3beR2K+RukGLJEmSJGl2jNo1e5IkSZKkWWCxJ0mSJEkdNHLX7M21JA8DDqd5YG3R3Mp6RVVdNdTAxlj7ne4NXFhVd/S0H1pVnxteZHeX5ECgquriJPsBhwJXt9fFjKwkH66qPx52HBuS5InAgcAVVXXOsOOZkORxwFVVdVuSewNLgMcAVwJ/X1U/HmqArSSvBD5VVTdusPOQJXkw8ByaRwOsA64BThuV71Kzz9w5XsYlL2+Kcc3lG2NU8+mmGKfctrFGNRdu0Wf2krwOOB0IcBHNra0DnJZkyTBj2xhJXjzsGCa0v8RnAq8ArkhyeM/svx9OVPeU5CTgXcApSd4CvBvYCViS5K+GGlyPJCsmvT4DPHfi87Dj65Xkop7pl9F8pzsDJ43Y79MHgJ+20+8E7gv8Q9v2wWEFNYU3Axcm+X9J/leSkXyYePs7/y/A9sBvAfemSXRfTXLwEEPTgHQld26sUcq1G2Nc8vKmGJdcvrHGKJ9uirHIbRtrlHPhFn2DliTfBvavql9Oar8XsKqqFg0nso2T5Iaq2mfYcQAkuRx4QlXdkWQhcAbwb1X1ziRfr6pHDzXAVhvnAcB2wHeB+T1nei6sqkcNNcBWkktpzji9j+boeYDTaJ6xRVVdMLzo7q7355vkYuAZVbU2yY7A16rqkcONsJHkqqp6eDt9aVU9pmfeZVV1wPCiu0uSrwOPBZ4KPB94NnAJzc//k1V1+xDD+7WJ36WqWp9kB+Dsqjo4yT7AmaPyO6/Z05XcubFGKddujHHJy5tiXHL5xhqXfLopxiW3baxRzoVb+jDOXwF7Ad+Z1L5nO29kJPnmdLOAPeYylg3YemKISFVd3x7NOCPJA2liHRXrqmo98NMk/11VtwFU1c+SjNLPfjHwKuCvgNdW1WVJfjZKRV6PrZLsQjNiIFW1FqCqfpJk3XBDu5srkry4qj4IfCPJ4qpameQhwC83tPAcqqr6FXAOcE6SbYGnA0cDbwVG6WjoNsB6mv9w7QxQVTe0Mat7xiZ3bqwxyrUbY1zy8qYYl1y+scYln26KccptG2skc+GWXuy9GjgvyTXAxNjhfYDfAF4+tKimtgdwCPDDSe0B/mvuw5nWd5McUFWXAbRHEp9JM3RulI5E/SLJDlX1U5ojTAAkuS8j9J+V9g/iO5L8e/t+C6P7e3tfmqNzASrJA6rqu0l2YrT+Q/FS4J1J3gB8j2aIxY00fwNeOtTI7u5u31l7FmUFsKI9aj0q3gdcnORrwJNohsTSDs35wTAD08CMU+7cWOOSazfGuOTlTTEWuXwTjEs+3RTjkts21sjmwi16GCdAkq1oLnrdm+Yf4Brg4vZI0chI8n7gg1X15Snmfayq/mgIYd1Dkvk0R9q+O8W8g6rqK0MI6x6SbFdVd07RvhuwZ1VdPoSwNijJYcBBVfX6YcfSr3Y4wx5Vdd2wY+mVZGfgQTTF85qqumXIId1NkodU1beHHUc/kuwPPJzm5gFXDzseDd645M6NNS65dmOMS17eFOOayzfVqObTjTFOuW1jjWou3OKLPUmSJEnqoi36bpySJEmS1FUWe5IkSZLUQRZ7kiRJktRBFntSK8keST6W5NoklyT5apLnTNFvYZIrpmj/2yRP7WM7j05SSQ6Zrdhn2NYd07T3FetmbvtFSd49yG1IkoZjuvwyTd8XJdlrUtu8JL9M8iezH909tn99e9OWye1/muSPB7ztg5N8dpDbkGZisScBSQJ8GvhSVT2oqh5L8+Dy+ZP6TfvYg6r6m6r6Qh+bOxr4cvs+ZSztne4GZiNilSRpc72I5tmMvY4EvsY0uRAgydYDjImq+peq+vAgtyENm8We1Hgy8Iuq+peJhqr6TlX93/aI5L8n+QzNQ0CnlORDSf4wydOTLO9pP7hddqKo/EOaxPe0JNu37QuTXJXkPcClwIIkr01ycZJvJnlTz/o+3Z55XJXk+A3tWJK3Jbk0yXnt815+HWs7fX2SN7V9Lk/ysLb9jUk+kOT89mznK3vW+cIkFyW5LMm/TiTkJC9O8u0kFwAHbfBblyR1RpIDknytzVufSrJLm2sWAx9tc8bEs9SOBl4DzE+yd8867mhHn1wIPGGGfHNKkpVtLnzT5Fim8Np2PRcl+Y12HW9M8hft9PlJ/qGd/+0kv9O2vyjJJ5N8Lsk1Sf6xJ9antaOALm3/n7BT235okquTfBl47uZ+r9LmsNiTGvvTFFnTeQJwbFU9uY91nQs8PsmO7efnAx9vpw8Crquq/wbOB57Rs9xDgQ9X1aPb6UU0z7E6AHhskie1/V7SnnlcDLwyyf1niGVH4NKqegxwAXDSNP2+1/Y5BfiLnvaH0Txg+EDgpCTbJnl4u08HVdUBwHrgBUn2BN7U7uPvA/vNEJckqXs+DLyuqh4FXA6cVFVnACuBF1TVAVX1syQLgAdU1UXAcpqcMmFHmueUPQ74PlPkm7bfX1XVYuBRwO8medQGYrutqg4E3g38n2n6bNP2eTV3z5cHtHE8Enh+kgXtsNA3AE9t8+dK4H+3B3HfCzwL+B3gARuISxooiz1pCkn+Ock3klzcNp1bVT/oZ9mqWgd8DnhWO+zzMODMdvbRwOnt9OncffjKd6rqa+3009rX12mK0IfRFH/QFHjfoBn+sqCnfSq/4q5C8yPAE6fp98n2/RJgYU/7WVV1Z1V9D7gV2AN4CvBY4OIkl7WfHwQ8Dji/qtZW1S96titJ6rgk9wXuV1UXtE3LgCdN0/0omiIP7pkL1wOfaKenyzcAz0tyKU2e3J8NH2A8ref9CdP0mS4XnldVP66qnwNXAg8EHt9u8yttbMe27Q+jOah7TTUPs/7IBuKSBmra64+kLcwq4A8mPlTVCe1Ru5Vt0082cn0fB04AfgBcXFW3t0NP/gB4dpK/AgLcP8nOU2wjwFuq6l97V5rkYOCpwBOq6qdJzge234i4apr2O9v39dz978KdPdMT8wIsq6oTJ8V2xAzrlyRpwtHAHkkmztLtlWRRVV0D/Lyq1rft0+WbfWlGofxWVf0wyYfYcC6saaZ7bWwuPLeq7nbNYZIDZli/NOc8syc1/hPYPsmf9bTtsBnrOx94DPAy7jrD9VTgG1W1oKoWVtUDaY5eHjHF8p8HXtIz/n/vJLsD9wV+2BZ6D6M5sjiTrWiuEQT4I5obw2yu84A/bOMhya5JHghcCByc5P5JtqW5+F6StAWoqh8DP5y41g04hubyAYDbgZ0BkjwU2LGq9m5z4ULgLTRn+yabLt/ch+YA6Y+T7AE8vY8Qn9/z/tWN3b8pfA04qOf6vx2SPAS4Gtg3yYPbftPegEaaC57Zk4CqqvbM1DuS/CWwliaRvA649xSLPDTJmp7Pfz5pfevT3Gr5RTRDO6D5g/+pSev5BPBnwP+btPw57bVxX00CcAfwQprhoX+a5JvAt2iSzUx+Auyf5BLgx9z9uohNUlVXJnkDcE6au4b+Ejihqr6W5I00SfRmmuGnA72TmiRpaHaYlAffTpPv/iXJDsC1wIvbeR9q239GczBzqlx4OvDm3sYN5Juv04zKuRb4Sh/xbtfe9GUrZqEAq6q1SV4EnJZku7b5DVX17TQ3TzsryfdoDrI+YnO3J22qNMOJJUmSJEld4jBOSZIkSeogh3FKHdAOTdluUvMxVXX5MOKRJGmuJfkUsO+k5tdV1eeHEY80ChzGKUmSJEkd5DBOSZIkSeogiz1JkiRJ6iCLPUmSJEnqIIs9SZIkSeogiz1JkiRJ6qD/D614NnlBAZTnAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 1080x360 with 2 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Note that the bins are not equally distributed\n",
    "plt.subplot(1, 2, 1)\n",
    "tmp.groupby('GrLivArea_binned')['GrLivArea'].count().plot.bar()\n",
    "plt.ylabel('Number of houses')\n",
    "plt.title('Number of observations per interval')\n",
    "\n",
    "plt.subplot(1, 2, 2)\n",
    "tmp.groupby('LotArea_binned')['LotArea'].count().plot.bar()\n",
    "plt.ylabel('Number of houses')\n",
    "plt.title('Number of observations per interval')\n",
    "\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Now return interval boundaries instead"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "EqualWidthDiscretiser(bins=10, return_boundaries=True, return_object=False,\n",
       "                      variables=['LotArea', 'GrLivArea'])"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ewd = EqualWidthDiscretiser(\n",
    "    bins=10, variables=['LotArea', 'GrLivArea'], return_boundaries=True)\n",
    "\n",
    "ewd.fit(X_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_t = ewd.transform(X_train)\n",
    "test_t = ewd.transform(X_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([Interval(-inf, 768.2, closed='right'),\n",
       "       Interval(768.2, 1202.4, closed='right'),\n",
       "       Interval(1202.4, 1636.6, closed='right'),\n",
       "       Interval(1636.6, 2070.8, closed='right'),\n",
       "       Interval(2070.8, 2505.0, closed='right'),\n",
       "       Interval(2505.0, 2939.2, closed='right'),\n",
       "       Interval(2939.2, 3373.4, closed='right'),\n",
       "       Interval(3373.4, 3807.6, closed='right'),\n",
       "       Interval(4241.8, inf, closed='right')], dtype=object)"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# the numbers are the different bins into which the observations\n",
    "# were sorted\n",
    "np.sort(np.ravel(train_t['GrLivArea'].unique()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([Interval(-inf, 768.2, closed='right'),\n",
       "       Interval(768.2, 1202.4, closed='right'),\n",
       "       Interval(1202.4, 1636.6, closed='right'),\n",
       "       Interval(1636.6, 2070.8, closed='right'),\n",
       "       Interval(2070.8, 2505.0, closed='right'),\n",
       "       Interval(2505.0, 2939.2, closed='right'),\n",
       "       Interval(2939.2, 3373.4, closed='right'),\n",
       "       Interval(4241.8, inf, closed='right')], dtype=object)"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.sort(np.ravel(test_t['GrLivArea'].unique()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[434.20000000000005, 434.1999999999998, 434.2000000000003, 434.1999999999998, 434.1999999999998, 434.2000000000003, 434.1999999999998]\n"
     ]
    }
   ],
   "source": [
    "#the intervals are more or less of the same length\n",
    "val = np.sort(np.ravel(train_t['GrLivArea'].unique()))\n",
    "print(list(x.right-x.left for x in val)[1:-1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "fengine",
   "language": "python",
   "name": "fengine"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.2"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
